Modeling
Diabetes damages arteries and makes them targets for hardening, called atherosclerosis, which can then cause high blood pressure. \(\\\) Diabetes confounders: age, gender, TC, BMI \(\\\) Hypertension confounders: serum total cholesterol (TC), serum HDL-cholesterol (HDL-C), body mass index (BMI) \(\\\)
1. Linear models
dat <- diabetes %>% mutate(w_h_ratio=waist/hip) %>% filter(!is.na(w_h_ratio) & !is.na(glyhb) & !is.na(chol))
dat$female[dat$gender=="female"] <- 1
dat$female[dat$gender=="male"] <- 0
dat["BMI"]=(dat$weight*0.453592)/(dat$height*0.0254)^2
dat <- dat %>% filter(!is.na(BMI))
dat <- dat[order(dat$BMI),]
scatter.smooth(dat$w_h_ratio, dat$glyhb, col="brown", xlab = "waist/hip ratio",
ylab = "Glycosylated Hemoglobin")
scatter.smooth(dat$age, dat$glyhb, col="blue", xlab = "Age",
ylab = "Glycosylated Hemoglobin")
boxplot(dat$glyhb~ dat$female, col="orange",xlab = "Female=1", ylab = "Glycosylated Hemoglobin")
hist(dat$glyhb, xlab = "Glycosolated Hemoglobin", main = "Histogram of glycosylated hemoglobin", col="gray")
hist(dat$w_h_ratio,xlab="Waist/hip ratio", main="Histogram of waist/hip ratio", col="blue")




# log transformaion on glycosylated hemoglobin (glyhb)
dat$ln_glyhb <- log(dat$glyhb)
scatter.smooth(dat$w_h_ratio, dat$ln_glyhb, col="brown", xlab = "waist/hip ratio",
ylab = "ln(Glycosylated Hemoglobin)")
hist(dat$ln_glyhb, xlab = "ln(Glycosolated Hemoglobin)", main = "Histogram of ln(glycosylated hemoglobin)", col="gray")


library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following objects are masked from 'package:DescTools':
##
## MAE, RMSE
## The following object is masked from 'package:purrr':
##
## lift
# preprocl <- preProcess(dat[,c("w_h_ratio", "chol", "BMI", "age","ln_glyhb")],method = c("range"))
#dat_nor <- predict(preprocl, dat[,c("w_h_ratio", "chol", "BMI", "age", "ln_glyhb")])
#hist(dat_nor$ln_glyhb)
# mod.normal <- lm(ln_glyhb ~ w_h_ratio + age + chol+I(chol^2)+BMI+I(BMI^2), data=dat_nor)
linear model coefficients
mod.wh <- lm(ln_glyhb ~ w_h_ratio,data=dat)
mod.age <- lm(ln_glyhb ~ age, data=dat)
mod.wh2 <- lm(ln_glyhb ~ w_h_ratio + I(w_h_ratio^2), data=dat) #no need
mod.wh_age <- lm(ln_glyhb ~ w_h_ratio + age, data=dat)
mod.wh_gender <- lm(ln_glyhb ~ w_h_ratio + female, data=dat)
mod.wh2_age <- lm(ln_glyhb ~ w_h_ratio + I(w_h_ratio^2)+ age, data=dat)
mod.wh2_age2 <- lm(ln_glyhb ~ w_h_ratio + I(w_h_ratio^2)+age+I(age^2), data=dat)
mod.int <- lm(ln_glyhb ~ w_h_ratio + age + w_h_ratio*age, data=dat) #no need
mod.spline_age <- lm(ln_glyhb~ bSpline(w_h_ratio, df=4) +age , data=dat)
mod.nspline_age <- lm(ln_glyhb~ ns(w_h_ratio, df=4) + age,data=dat)
mod.spline <- lm(ln_glyhb~bSpline(w_h_ratio,df=4), data=dat)
mod.wh_age_chol <- lm(ln_glyhb ~ w_h_ratio + age+ chol, data=dat)
mod.wh_age_chol_bmi <- lm(ln_glyhb ~ w_h_ratio + age+ chol + BMI, data=dat)
mod.wh_age_chol2_bmi <- lm(ln_glyhb~ w_h_ratio+age+chol+I(chol^2)+BMI, data=dat)
mod.wh_age_chol2_bmi2 <- lm(ln_glyhb~ w_h_ratio+age+chol+I(chol^2)+BMI+I(BMI^2), data=dat)
mod.int2 <- lm(ln_glyhb ~ w_h_ratio + age + chol + BMI + chol*w_h_ratio, data=dat) #no need
mod.int3 <- lm(ln_glyhb ~ w_h_ratio + age + chol+ BMI + BMI*w_h_ratio,data=dat) #no need
tidy(mod.wh) %>% mutate('2.5%'=confint(mod.wh)[,1]) %>% mutate('97.5%'=confint(mod.wh)[,2])
tidy(mod.wh2) %>% mutate('2.5%'=confint(mod.wh2)[,1]) %>% mutate('97.5%'=confint(mod.wh2)[,2])
tidy(mod.age) %>% mutate('2.5%'=confint(mod.age)[,1]) %>% mutate('97.5%'=confint(mod.age)[,2])
tidy(mod.wh_age) %>% mutate('2.5%'=confint(mod.wh_age)[,1]) %>% mutate('97.5%'=confint(mod.wh_age)[,2])
tidy(mod.wh_gender) %>% mutate('2.5%'=confint(mod.wh_gender)[,1]) %>% mutate('97.5%'=confint(mod.wh_gender)[,2])
tidy(mod.wh2_age) %>% mutate('2.5%'=confint(mod.wh2_age)[,1]) %>% mutate('97.5%'=confint(mod.wh2_age)[,2])
tidy(mod.wh2_age2) %>% mutate('2.5%'=confint(mod.wh2_age2)[,1]) %>% mutate('97.5%'=confint(mod.wh2_age2)[,2])
tidy(mod.int) #no need
tidy(mod.spline_age) %>% mutate('2.5%'=confint(mod.spline_age)[,1]) %>% mutate('97.5%'=confint(mod.spline_age)[,2])
tidy(mod.nspline_age) %>% mutate('2.5%'=confint(mod.nspline_age)[,1]) %>% mutate('97.5%'=confint(mod.nspline_age)[,2])
tidy(mod.wh_age_chol)
tidy(mod.wh_age_chol_bmi)
tidy(mod.wh_age_chol2_bmi)
tidy(mod.wh_age_chol2_bmi2) %>% mutate('2.5%'=confint(mod.wh_age_chol2_bmi2)[,1]) %>%
mutate('97.5%'=confint(mod.wh_age_chol2_bmi2)[,2])
tidy(mod.int2) #no need
tidy(mod.int3) #no need
anova(mod.wh_age,mod.spline_age)
anova(mod.wh_age, mod.nspline_age)
#imputated models
mod.imp.wh <- with(tempData, lm(lnglyhb ~ w_h_ratio))
mod.imp.wh_age <- with(tempData,lm(lnglyhb ~ w_h_ratio+age))
mod.imp.wh_age_chol <- with(tempData, lm(lnglyhb ~w_h_ratio + age+chol))
mod.imp.wh_age_chol2_bmi2 <- with(tempData, lm(lnglyhb ~ w_h_ratio +age+chol+I(chol^2)+BMI+I(BMI^2)))
summary(pool(mod.imp.wh_age_chol2_bmi2))
## term estimate std.error statistic df p.value
## 1 (Intercept) 9.770420e-01 3.150126e-01 3.101597 325.7748 2.093038e-03
## 2 w_h_ratio 3.762357e-01 2.038268e-01 1.845860 385.8979 6.567824e-02
## 3 age 6.315271e-03 9.145093e-04 6.905639 382.6038 2.090861e-11
## 4 chol -4.240503e-03 1.718675e-03 -2.467310 392.2344 1.403945e-02
## 5 I(chol^2) 1.148041e-05 3.673835e-06 3.124912 393.3280 1.910499e-03
## 6 BMI 2.239944e-02 1.477827e-02 1.515701 165.8391 1.314987e-01
## 7 I(BMI^2) -2.562823e-04 2.287887e-04 -1.120170 191.8205 2.640419e-01
pool.r.squared(mod.imp.wh_age_chol2_bmi2)
## est lo 95 hi 95 fmi
## R^2 0.2199923 0.1509404 0.2942487 NaN
pool.compare(mod.imp.wh_age_chol2_bmi2, mod.imp.wh_age_chol, method = "wald")
## Warning: 'pool.compare' is deprecated.
## Use 'D1' instead.
## See help("Deprecated")
## $call
## pool.compare(fit1 = mod.imp.wh_age_chol2_bmi2, fit0 = mod.imp.wh_age_chol,
## method = "wald")
##
## $call11
## with.mids(data = tempData, expr = lm(lnglyhb ~ w_h_ratio + age +
## chol + I(chol^2) + BMI + I(BMI^2)))
##
## $call12
## mice(data = diabetes2, m = 5, method = "pmm", seed = 96324)
##
## $call01
## with.mids(data = tempData, expr = lm(lnglyhb ~ w_h_ratio + age +
## chol))
##
## $call02
## mice(data = diabetes2, m = 5, method = "pmm", seed = 96324)
##
## $method
## [1] "wald"
##
## $nmis
## id glyhb lnglyhb w_h_ratio age BMI chol hdl
## 0 13 13 2 0 6 1 1
## diabetes outcome gender
## 13 13 0
##
## $m
## [1] 5
##
## $qbar1
## (Intercept) w_h_ratio age chol I(chol^2)
## 9.770420e-01 3.762357e-01 6.315271e-03 -4.240503e-03 1.148041e-05
## BMI I(BMI^2)
## 2.239944e-02 -2.562823e-04
##
## $qbar0
## (Intercept) w_h_ratio age chol
## 0.701572500 0.505540445 0.005913854 0.001150394
##
## $ubar1
## [1] 9.515211e-02 4.111503e-02 8.254534e-07 2.943818e-06 1.347658e-11
## [6] 1.938050e-04 4.721607e-08
##
## $ubar0
## [1] 3.246926e-02 4.126520e-02 8.487819e-07 1.072253e-07
##
## $deviances
## NULL
##
## $Dm
## [,1]
## [1,] 4.250771
##
## $rm
## [1] 0.0790081
##
## $df1
## [1] 3
##
## $df2
## [1] 1070.747
##
## $pvalue
## [,1]
## [1,] 0.005375078
Linear model compare
plot(mod.wh_age_chol2_bmi2,which=c(1,2,3))
# Cook's Distance Plot
ols_plot_cooksd_chart(mod.wh_age_chol_bmi) #id 2778, influential & outlier
# DFFITS Plot
ols_plot_dffits(mod.wh_age_chol_bmi)
# Leverage Plot (outlier)
ols_plot_resid_lev(mod.wh_age_chol_bmi)
dat %>% filter(id==2778) # very high glyhb & chol in terms of her age
## id chol stab.glu hdl ratio glyhb location age gender height weight frame
## 1 2778 443 185 23 19.3 14.31 Buckingham 51 female 70 235 medium
## bp.1s bp.1d bp.2s bp.2d waist hip time.ppn outcome w_h_ratio female BMI
## 1 158 98 148 88 43 48 420 3 0.8958333 1 33.71862
## ln_glyhb
## 1 2.660959
dat_eli <- dat %>% filter(!id==2778)
mod.eli <- lm(ln_glyhb~ w_h_ratio +age+chol+I(chol^2)+BMI+I(BMI^2), data=dat_eli)






| ln_glyhb ~ w_h_ratio |
0.0573543 |
0.0548671 |
0.3018287 |
174.4346 |
186.2630 |
| ln_glyhb ~ age |
0.1439173 |
0.1416585 |
0.2876366 |
137.7354 |
149.5638 |
| ln_glyhb ~ w_h_ratio + age |
0.1627443 |
0.1583143 |
0.2844562 |
131.2629 |
147.0341 |
| ln_glyhb ~ w_h_ratio + I(w_h_ratio^2) + age |
0.1627459 |
0.1560834 |
0.2844559 |
133.2622 |
152.9762 |
| ln_glyhb ~ w_h_ratio + I(w_h_ratio^2) + age + I(age^2) |
0.1679803 |
0.1591290 |
0.2835653 |
132.8728 |
156.5295 |
| ln_glyhb ~ bSpline(w_h_ratio) + age |
0.1645070 |
0.1533671 |
0.2841565 |
136.4599 |
164.0595 |
| ln_glyhb ~ ns(w_h_ratio) + age |
0.1628954 |
0.1517340 |
0.2844305 |
137.1942 |
164.7937 |
| ln_glyhb ~ w_h_ratio + age + chol |
0.1931040 |
0.1866830 |
0.2792512 |
119.1908 |
138.9048 |
| ln_glyhb ~ w_h_ratio + age + chol+ bmi |
0.2100817 |
0.2016783 |
0.2762977 |
113.0887 |
136.7455 |
| ln_glyhb ~ w_h_ratio + age + chol + I(chol^2)+ bmi |
0.2259944 |
0.2156744 |
0.2735006 |
107.3352 |
134.9348 |
| ln_glyhb ~ w_h_ratio + age + chol+ I(chol^2) + bmi+I(bmi^2) |
0.2285425 |
0.2161662 |
0.2730500 |
108.0788 |
139.6212 |
| ln_glyhb ~ eliminate influential point |
0.2091315 |
0.1964098 |
0.2730504 |
107.8381 |
139.3595 |
ggplot(data=dat, aes(x=w_h_ratio,y=ln_glyhb))+
geom_point(alpha=0.5,col=ifelse(dat$female==1,"hot pink","blue"))+
geom_line(aes(y=predict(mod.wh_gender),col=ifelse(dat$female==1,"wh+female","wh+male")))+
geom_line(aes(y=predict(mod.wh),col="wh"))+
geom_line(aes(y=predict(mod.wh2),col="wh2"))+
scale_color_manual(name="Models", values = c("wh"="green", "wh+male"="blue", "wh+female"="hot pink",
"wh2"="black"))+
theme_bw()
## Warning: Use of `dat$female` is discouraged. Use `female` instead.

2. Logistic, Multinomial, Ordinal models
- Logistic model– have/don’t have diabetes Have diabetes if value of glycosylated hemoglobin > 6.50 DCCT%, otherwise, don’t have diabetes. Then, the outcome would be binomial, either have or don’t have the diabetes.
dat$diabetes = ifelse(dat$glyhb > 6.50, 1,0) #logistic model
mod_lg <- glm(diabetes ~ w_h_ratio, family=binomial(),data=dat)
mod_lg2 <- glm(diabetes ~ w_h_ratio + I(w_h_ratio^2), family=binomial(), data=dat)
tidy(mod_lg) %>% mutate('2.5%'=confint(mod_lg)[,1]) %>% mutate('97.5%'=confint(mod_lg)[,2])
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## # A tibble: 2 x 7
## term estimate std.error statistic p.value `2.5%` `97.5%`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -8.10 1.73 -4.69 0.00000277 -11.6 -4.78
## 2 w_h_ratio 7.25 1.90 3.81 0.000138 3.57 11.1
tidy(mod_lg2) %>% mutate('2.5%'=confint(mod_lg2)[,1]) %>% mutate('97.5%'=confint(mod_lg2)[,2])
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## # A tibble: 3 x 7
## term estimate std.error statistic p.value `2.5%` `97.5%`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -25.8 15.6 -1.65 0.0982 -59.0 2.94
## 2 w_h_ratio 45.9 33.8 1.36 0.174 -16.6 118.
## 3 I(w_h_ratio^2) -21.0 18.3 -1.15 0.251 -59.7 12.9
anova(mod_lg, mod_lg2, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: diabetes ~ w_h_ratio
## Model 2: diabetes ~ w_h_ratio + I(w_h_ratio^2)
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 379 326.62
## 2 378 325.21 1 1.4153 0.2342
#imputated models
mod.imp.lg <- with(data = tempData, glm(diabetes ~ w_h_ratio, family=binomial()))
mod.imp.lg_chol_bmi <- with(data=tempData, glm(diabetes ~ w_h_ratio+age+chol+BMI, family=binomial()))
summary(pool(mod.imp.lg_chol_bmi))
## term estimate std.error statistic df p.value
## 1 (Intercept) -10.864499534 2.011600755 -5.400922 386.2862 1.161069e-07
## 2 w_h_ratio 3.690012889 2.002281353 1.842904 378.0120 6.612578e-02
## 3 age 0.046822866 0.010177551 4.600603 204.8704 7.374940e-06
## 4 chol 0.008985799 0.003172021 2.832831 354.5841 4.877496e-03
## 5 BMI 0.054200020 0.023166062 2.339630 184.8318 2.037072e-02
pool.compare(mod.imp.lg_chol_bmi, mod.imp.lg,method="likelihood")
## Warning: 'pool.compare' is deprecated.
## Use 'D1' instead.
## See help("Deprecated")
## $call
## pool.compare(fit1 = mod.imp.lg_chol_bmi, fit0 = mod.imp.lg, method = "likelihood")
##
## $call11
## with.mids(data = tempData, expr = glm(diabetes ~ w_h_ratio +
## age + chol + BMI, family = binomial()))
##
## $call12
## mice(data = diabetes2, m = 5, method = "pmm", seed = 96324)
##
## $call01
## with.mids(data = tempData, expr = glm(diabetes ~ w_h_ratio, family = binomial()))
##
## $call02
## mice(data = diabetes2, m = 5, method = "pmm", seed = 96324)
##
## $method
## [1] "likelihood"
##
## $nmis
## id glyhb lnglyhb w_h_ratio age BMI chol hdl
## 0 13 13 2 0 6 1 1
## diabetes outcome gender
## 13 13 0
##
## $m
## [1] 5
##
## $qbar1
## (Intercept) w_h_ratio age chol BMI
## -10.864499534 3.690012889 0.046822866 0.008985799 0.054200020
##
## $qbar0
## (Intercept) w_h_ratio
## -7.570741 6.675263
##
## $ubar1
## [1] 3.999443e+00 3.939423e+00 9.408652e-05 9.764473e-06 4.819528e-04
##
## $ubar0
## [1] 2.831070 3.452928
##
## $deviances
## $deviances$dev1.M
## [1] 297.9033 312.9882 302.1334 308.0085 301.6038
##
## $deviances$dev0.M
## [1] 342.9807 356.4104 345.6270 348.6405 349.7884
##
## $deviances$dev1.L
## [1] 297.9948 313.1486 302.1584 308.2401 301.8658
##
## $deviances$dev0.L
## [1] 342.9813 356.4313 345.6333 348.6510 349.7927
##
##
## $Dm
## [1] 13.6769
##
## $rm
## [1] 0.07276765
##
## $df1
## [1] 3
##
## $df2
## [1] 1244.414
##
## $pvalue
## [1] 8.829767e-09
plot(dat$diabetes~dat$w_h_ratio,ylab="P(Diabetes)",xlab="waist/hip ratio", col="black")
lines(mod_lg$fitted.values~dat$w_h_ratio,type='p',col='blue')
lines(mod_lg2$fitted.values~ dat$w_h_ratio, type='p', col="red")
legend('bottomright',legend=c("linear","quadratic"),pch=c(1,1),col=c("blue","red"))

mod_lg3 <- glm(diabetes ~ w_h_ratio + age, family=binomial(),data=dat)
mod_lg4<- glm(diabetes ~ w_h_ratio + age + w_h_ratio*age, family=binomial(),data=dat)
mod_lg5 <- glm(diabetes ~ w_h_ratio + age + chol + BMI, family=binomial(), data=dat)
mod_lg6 <- glm(diabetes ~ w_h_ratio+age+chol+I(chol^2)+BMI, family = binomial(),data=dat)
mod_lg7 <- glm(diabetes ~ w_h_ratio+age+chol+I(chol^2)+BMI+I(BMI^2),family=binomial(),data=dat)
mod_int1 <- glm(diabetes ~ w_h_ratio +age+chol+BMI + w_h_ratio*chol, family = binomial(),data=dat) #no need
mod_int2 <- glm(diabetes ~ w_h_ratio + age + chol + BMI+w_h_ratio * BMI, family = binomial(),data=dat) #no need
library(ResourceSelection)
library(LogisticDx)
tidy(mod_lg3) %>% mutate('2.5%'=confint(mod_lg3)[,1]) %>% mutate('97.5%'=confint(mod_lg3)[,2])
tidy(mod_lg4) %>% mutate('2.5%'=confint(mod_lg4)[,1]) %>% mutate('97.5%'=confint(mod_lg4)[,2]) #no need
tidy(mod_lg5)
tidy(mod_lg6)
tidy(mod_lg7)
tidy(mod_int1) #no need
tidy(mod_int2) # no need
locoef<-exp(coef(mod_lg5)[2]*0.1)
exp(log(locoef) + c(-1, 1)*1.96*0.1*2.037622)
hoslem.test(dat$diabetes, fitted(mod_lg5), g=10)
gof(mod_lg5)
#imputated model
mod.imp.lg3 <- with(data = tempData, glm(diabetes~ w_h_ratio+age, family = binomial()))
mod.imp.lg5 <- with(data=tempData, glm(diabetes ~ w_h_ratio +age+ chol+BMI, family = binomial()))
summary(pool(mod.imp.lg5))
pool.compare(mod.imp.lg5, mod.imp.lg3, method = "likelihood")
| logit(P_diabetes) ~ w_h_ratio |
330.6225 |
338.5081 |
0.0441627 |
| logit(P_diabetes) ~ w_h_ratio + age |
304.5589 |
316.3873 |
0.1262887 |
| logit(P_diabetes) ~ w_h_ratio+ age+chol+bmi |
291.9100 |
311.6240 |
0.1750106 |
| logit(P_diabetes) ~ w_h_ratio+age+chol+I(chol^2)+bmi |
293.2562 |
316.9130 |
0.1769237 |
| logit(P_diabetes) ~ w_h_ratio + age + chol + I(chol^2)+ bmi+I(bmi^2) |
293.1390 |
320.7386 |
0.1831196 |
Multinomial Models
Based on the 2010 American Diabetes Association Standards of Medical Care in Diabetes, HbA1C < 5.7\(\%\) is diagnosed as normal, 5.7-6.4\(\%\) is diagnosed as prediabetes and > 6.5\(\%\) is diagnosed as diabetes.
library(nnet)
# outcome =1: normal
# outcome =2: prediabetes
# outcome =3: diabetes
dat$outcome[dat$glyhb < 5.70] <- 1
dat$outcome[dat$glyhb >=5.70 & dat$glyhb <=6.50] <-2
dat$outcome[dat$glyhb >6.50] <-3
mod.mul_wh <- multinom(outcome ~ w_h_ratio, data=dat)
## # weights: 9 (4 variable)
## initial value 418.571282
## iter 10 value 243.333915
## iter 20 value 242.883177
## final value 242.882318
## converged
summ.MNfit(mod.mul_wh)
##
## Level 2 vs. Level 1
## betaHat se pval RRR RRR.lower RRR.upper
## (Intercept) -11.091 2.634 0.000 0.00 0.000 0.003
## w_h_ratio 9.589 2.867 0.001 14596.57 52.923 4025880.330
##
## Level 3 vs. Level 1
## betaHat se pval RRR RRR.lower RRR.upper
## (Intercept) -8.991 1.799 0 0.000 0.000 0.004
## w_h_ratio 8.353 1.984 0 4241.562 86.887 207059.411
mod.mul_wh_age <- multinom(outcome ~ w_h_ratio+age, data=dat)
## # weights: 12 (6 variable)
## initial value 418.571282
## iter 10 value 226.917988
## iter 20 value 223.943778
## final value 223.918303
## converged
summ.MNfit(mod.mul_wh_age)
##
## Level 2 vs. Level 1
## betaHat se pval RRR RRR.lower RRR.upper
## (Intercept) -11.196 2.657 0.000 0.000 0.000 0.003
## w_h_ratio 7.268 2.947 0.014 1433.815 4.445 462511.301
## age 0.044 0.014 0.002 1.045 1.016 1.074
##
## Level 3 vs. Level 1
## betaHat se pval RRR RRR.lower RRR.upper
## (Intercept) -9.268 1.870 0.000 0.000 0.000 0.004
## w_h_ratio 5.638 2.086 0.007 281.010 4.712 16756.945
## age 0.053 0.010 0.000 1.054 1.034 1.075
mod.mul_int <- multinom(outcome~ w_h_ratio + age+w_h_ratio*age, data=dat)
## # weights: 15 (8 variable)
## initial value 418.571282
## iter 10 value 235.145054
## iter 20 value 225.068841
## iter 30 value 223.441915
## iter 40 value 223.356107
## iter 50 value 223.202476
## iter 60 value 223.197788
## final value 223.195536
## converged
summ.MNfit(mod.mul_int)
##
## Level 2 vs. Level 1
## betaHat se pval RRR RRR.lower RRR.upper
## (Intercept) -14.891 1.788 0.000 0.000 0.000 0.000
## w_h_ratio 11.415 1.911 0.000 90657.952 2143.235 3834793.577
## age 0.114 0.055 0.036 1.121 1.007 1.248
## w_h_ratio:age -0.079 0.057 0.165 0.924 0.827 1.033
##
## Level 3 vs. Level 1
## betaHat se pval RRR RRR.lower RRR.upper
## (Intercept) -17.687 7.003 0.012 0.000 0.000 1.900000e-02
## w_h_ratio 15.031 7.753 0.053 3370579.635 0.848 1.340151e+13
## age 0.202 0.119 0.089 1.224 0.970 1.544000e+00
## w_h_ratio:age -0.166 0.131 0.205 0.847 0.656 1.095000e+00
anova(mod.mul_wh_age, mod.mul_int, test="Chisq")
## Likelihood ratio tests of Multinomial Models
##
## Response: outcome
## Model Resid. df Resid. Dev Test Df LR stat.
## 1 w_h_ratio + age 756 447.8366
## 2 w_h_ratio + age + w_h_ratio * age 754 446.3911 1 vs 2 2 1.445534
## Pr(Chi)
## 1
## 2 0.4854072
mod.mul_wh_age_chol_bmi <- multinom(outcome ~ w_h_ratio + age + chol + BMI, data=dat)
## # weights: 18 (10 variable)
## initial value 418.571282
## iter 10 value 222.562958
## iter 20 value 215.503183
## final value 215.503008
## converged
summ.MNfit(mod.mul_wh_age_chol_bmi)
##
## Level 2 vs. Level 1
## betaHat se pval RRR RRR.lower RRR.upper
## (Intercept) -11.217 2.961 0.000 0.000 0.000 0.004
## w_h_ratio 6.886 2.968 0.020 978.477 2.915 328472.200
## age 0.045 0.015 0.002 1.046 1.017 1.076
## chol -0.002 0.005 0.750 0.998 0.988 1.009
## BMI 0.022 0.035 0.524 1.023 0.955 1.096
##
## Level 3 vs. Level 1
## betaHat se pval RRR RRR.lower RRR.upper
## (Intercept) -12.747 2.166 0.000 0.000 0.000 0.000
## w_h_ratio 5.232 2.122 0.014 187.167 2.926 11974.528
## age 0.053 0.010 0.000 1.054 1.033 1.076
## chol 0.009 0.003 0.006 1.009 1.003 1.016
## BMI 0.063 0.023 0.008 1.065 1.017 1.114
co2vs1<-exp(coef(mod.mul_wh_age_chol_bmi)[1,2]*0.1)
co3vs1<-exp(coef(mod.mul_wh_age_chol_bmi)[2,2]*0.1)
exp(log(co2vs1) + c(-1, 1)*1.96*0.1*summary(mod.mul_wh_age_chol_bmi)$standard.errors[1,2])
## [1] 1.112899 3.561677
exp(log(co3vs1) + c(-1, 1)*1.96*0.1*summary(mod.mul_wh_age_chol_bmi)$standard.errors[2,2])
## [1] 1.113312 2.557580
anova(mod.mul_wh_age, mod.mul_wh_age_chol_bmi, test="Chisq")
## Likelihood ratio tests of Multinomial Models
##
## Response: outcome
## Model Resid. df Resid. Dev Test Df LR stat.
## 1 w_h_ratio + age 756 447.8366
## 2 w_h_ratio + age + chol + BMI 752 431.0060 1 vs 2 4 16.83059
## Pr(Chi)
## 1
## 2 0.002085055
mod.mul_wh_age_chol2_bmi <- multinom(outcome ~ w_h_ratio + age + chol+I(chol^2) + BMI, data=dat) #no need
## # weights: 21 (12 variable)
## initial value 418.571282
## iter 10 value 225.327789
## iter 20 value 215.108498
## final value 215.048531
## converged
anova(mod.mul_wh_age_chol_bmi,mod.mul_wh_age_chol2_bmi, test="Chisq") #no need
## Likelihood ratio tests of Multinomial Models
##
## Response: outcome
## Model Resid. df Resid. Dev Test Df
## 1 w_h_ratio + age + chol + BMI 752 431.0060
## 2 w_h_ratio + age + chol + I(chol^2) + BMI 750 430.0971 1 vs 2 2
## LR stat. Pr(Chi)
## 1
## 2 0.9089532 0.6347801
mod.mul_wh_age_chol2_bmi2 <- multinom(outcome ~ w_h_ratio + age + chol+I(chol^2) + BMI+I(BMI^2), data=dat) #no need
## # weights: 24 (14 variable)
## initial value 418.571282
## iter 10 value 240.966870
## iter 20 value 214.198693
## iter 30 value 213.959969
## final value 213.959965
## converged
anova(mod.mul_wh_age_chol_bmi,mod.mul_wh_age_chol2_bmi2, test="Chisq") #no need
## Likelihood ratio tests of Multinomial Models
##
## Response: outcome
## Model Resid. df Resid. Dev
## 1 w_h_ratio + age + chol + BMI 752 431.0060
## 2 w_h_ratio + age + chol + I(chol^2) + BMI + I(BMI^2) 748 427.9199
## Test Df LR stat. Pr(Chi)
## 1
## 2 1 vs 2 4 3.086087 0.5435238
#imputated models
mod.imp.mul_wh <- with(data=tempData, multinom(outcome ~ w_h_ratio))
## # weights: 9 (4 variable)
## initial value 442.740752
## iter 10 value 258.238838
## iter 20 value 257.872597
## final value 257.872097
## converged
## # weights: 9 (4 variable)
## initial value 442.740752
## iter 10 value 259.360601
## iter 20 value 258.922465
## final value 258.921860
## converged
## # weights: 9 (4 variable)
## initial value 442.740752
## iter 10 value 260.002134
## iter 20 value 259.629534
## final value 259.629034
## converged
## # weights: 9 (4 variable)
## initial value 442.740752
## iter 10 value 258.279088
## iter 20 value 257.847393
## final value 257.846741
## converged
## # weights: 9 (4 variable)
## initial value 442.740752
## iter 10 value 256.220717
## iter 20 value 255.781036
## final value 255.780362
## converged
mod.imp.mul_wh_age <- with(data=tempData,multinom(outcome~w_h_ratio + age))
## # weights: 12 (6 variable)
## initial value 442.740752
## iter 10 value 240.840542
## iter 20 value 237.674551
## final value 237.663111
## converged
## # weights: 12 (6 variable)
## initial value 442.740752
## iter 10 value 242.778870
## iter 20 value 240.644084
## final value 240.626569
## converged
## # weights: 12 (6 variable)
## initial value 442.740752
## iter 10 value 243.503945
## iter 20 value 240.621748
## final value 240.608292
## converged
## # weights: 12 (6 variable)
## initial value 442.740752
## iter 10 value 241.496619
## iter 20 value 238.341687
## final value 238.326388
## converged
## # weights: 12 (6 variable)
## initial value 442.740752
## iter 10 value 237.149969
## iter 20 value 234.854788
## final value 234.837024
## converged
mod.imp.mul_wh_age_chol_bmi <- with(data=tempData, multinom(outcome ~ w_h_ratio +age+chol+BMI))
## # weights: 18 (10 variable)
## initial value 442.740752
## iter 10 value 237.210361
## iter 20 value 230.351727
## final value 230.351559
## converged
## # weights: 18 (10 variable)
## initial value 442.740752
## iter 10 value 239.165144
## iter 20 value 232.207976
## final value 232.207774
## converged
## # weights: 18 (10 variable)
## initial value 442.740752
## iter 10 value 239.106805
## iter 20 value 232.381706
## final value 232.381555
## converged
## # weights: 18 (10 variable)
## initial value 442.740752
## iter 10 value 238.290846
## iter 20 value 231.739167
## final value 231.738904
## converged
## # weights: 18 (10 variable)
## initial value 442.740752
## iter 10 value 233.088931
## iter 20 value 226.412468
## final value 226.412102
## converged
table_multi<- matrix(c(AIC(mod.mul_wh), BIC(mod.mul_wh),
AIC(mod.mul_wh_age ),BIC(mod.mul_wh_age ),
AIC(mod.mul_int), BIC(mod.mul_int),
AIC(mod.mul_wh_age_chol_bmi), BIC(mod.mul_wh_age_chol_bmi)
), ncol=2,nrow=4, byrow=TRUE)
colnames(table_multi)<- c("AIC", "BIC")
rownames(table_multi)<- c("log(RRR) ~ w_h_ratio",
"log(RRR) ~ w_h_ratio + age",
"log(RRR) ~ w_h_ratio + age+w_h_ratio*age",
"log(RRR) ~ w_h_ratio + age + chol + BMI")
kable(table_multi)
| log(RRR) ~ w_h_ratio |
493.7646 |
509.5358 |
| log(RRR) ~ w_h_ratio + age |
459.8366 |
483.4934 |
| log(RRR) ~ w_h_ratio + age+w_h_ratio*age |
462.3911 |
493.9335 |
| log(RRR) ~ w_h_ratio + age + chol + BMI |
451.0060 |
490.4340 |
plot(mod.mul_wh$fitted.values[,1][order(dat$w_h_ratio)] ~ sort(dat$w_h_ratio), type="l", col="dodgerblue", xlab=c("Waist/hip ratio"), ylab="Predicted Probability", ylim=c(0,1), main="Fitted Values, o1=Blue, o2=Pink, o3=Green")
points(mod.mul_wh$fitted.values[,2][order(dat$w_h_ratio)] ~ sort(dat$w_h_ratio), type="l", col="magenta")
points(mod.mul_wh$fitted.values[,3][order(dat$w_h_ratio)]~sort(dat$w_h_ratio), type="l", col="green")

Ordinal Model
library(VGAM)
## Loading required package: stats4
##
## Attaching package: 'VGAM'
## The following object is masked from 'package:caret':
##
## predictors
## The following object is masked from 'package:tidyr':
##
## fill
# outcome =1: normal
# outcome =2: prediabetes
# outcome =3: diabetes
ord.wh <- vglm(outcome ~ w_h_ratio, cumulative(parallel=TRUE, reverse=TRUE), data=dat)
ord.wh_age <- vglm(outcome~ w_h_ratio + age, cumulative(parallel=TRUE, reverse=TRUE), data=dat)
ord.int <- vglm(outcome~w_h_ratio + age + w_h_ratio *age, cumulative(parallel = TRUE, reverse = TRUE),data=dat)
ord.wh_age_chol_bmi <- vglm(outcome ~ w_h_ratio + age + chol+ BMI, cumulative(parallel = TRUE, reverse = TRUE), data=dat)
ord.wh_age_chol2_bmi <- vglm(outcome ~ w_h_ratio + age + chol+I(chol^2)+ BMI, cumulative(parallel = TRUE, reverse = TRUE), data=dat) #no need
ord.wh_age_chol2_bmi2 <- vglm(outcome ~ w_h_ratio + age + chol+I(chol^2)+ BMI+I(BMI^2), cumulative(parallel = TRUE, reverse = TRUE), data=dat) #no need
summary(ord.int)
##
## Call:
## vglm(formula = outcome ~ w_h_ratio + age + w_h_ratio * age, family = cumulative(parallel = TRUE,
## reverse = TRUE), data = dat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -17.0987 6.3642 NA NA
## (Intercept):2 -17.5656 6.3665 -2.759 0.0058 **
## w_h_ratio 14.9580 7.0567 2.120 0.0340 *
## age 0.1982 0.1090 1.819 0.0689 .
## w_h_ratio:age -0.1658 0.1202 -1.379 0.1679
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 448.1292 on 757 degrees of freedom
##
## Log-likelihood: -224.0646 on 757 degrees of freedom
##
## Number of Fisher scoring iterations: 5
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
##
##
## Exponentiated coefficients:
## w_h_ratio age w_h_ratio:age
## 3.134458e+06 1.219196e+00 8.472518e-01
summary(ord.wh_age_chol_bmi)
##
## Call:
## vglm(formula = outcome ~ w_h_ratio + age + chol + BMI, family = cumulative(parallel = TRUE,
## reverse = TRUE), data = dat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -11.453072 1.844428 -6.210 5.31e-10 ***
## (Intercept):2 -11.942093 1.855078 -6.438 1.21e-10 ***
## w_h_ratio 5.288779 1.830869 2.889 0.00387 **
## age 0.048605 0.008860 5.486 4.12e-08 ***
## chol 0.007463 0.002885 2.587 0.00969 **
## BMI 0.049193 0.020344 2.418 0.01560 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 436.1374 on 756 degrees of freedom
##
## Log-likelihood: -218.0687 on 756 degrees of freedom
##
## Number of Fisher scoring iterations: 5
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
##
##
## Exponentiated coefficients:
## w_h_ratio age chol BMI
## 198.101337 1.049805 1.007491 1.050423
confint(ord.wh_age_chol_bmi)
## 2.5 % 97.5 %
## (Intercept):1 -15.068084033 -7.83805940
## (Intercept):2 -15.577979316 -8.30620665
## w_h_ratio 1.700340926 8.87721648
## age 0.031239361 0.06597006
## chol 0.001808504 0.01311682
## BMI 0.009320564 0.08906582
exp(coef(ord.wh_age_chol_bmi)[3])
## w_h_ratio
## 198.1013
or2vs1<-exp(coef(ord.wh_age_chol_bmi)[3]*0.1)
exp(coef(ord.wh_age_chol_bmi)[3]*0.1 + c(-1, 1)*1.96*0.1*sqrt(vcov(ord.wh_age_chol_bmi)[3,3]))
## [1] 1.185337 2.429604
table_ord<- matrix(c(AIC(ord.wh), BIC(ord.wh),
AIC(ord.wh_age),BIC(ord.wh_age),
AIC(ord.int), BIC(ord.int),
AIC(ord.wh_age_chol_bmi), BIC(ord.wh_age_chol_bmi),
AIC(ord.wh_age_chol2_bmi),BIC(ord.wh_age_chol2_bmi),
AIC(ord.wh_age_chol2_bmi2), BIC(ord.wh_age_chol2_bmi2)
), ncol=2,nrow=6, byrow=TRUE)
colnames(table_ord)<- c("AIC", "BIC")
rownames(table_ord)<- c("log(OR) ~ w_h_ratio",
"log(OR) ~ w_h_ratio + age",
"log(OR) ~ w_h_ratio + age + w_h_ratio*age",
"log(OR) ~ w_h_ratio+ age+chol+bmi",
"log(OR) ~ w_h_ratio+age+chol+I(chol^2)+bmi",
"log(OR) ~ w_h_ratio + age + chol + I(chol^2)+ bmi+I(bmi^2)")
table1 <- as.table(table_ord)
kable(table_ord)
| log(OR) ~ w_h_ratio |
493.0312 |
504.8596 |
| log(OR) ~ w_h_ratio + age |
458.0683 |
473.8395 |
| log(OR) ~ w_h_ratio + age + w_h_ratio*age |
458.1292 |
477.8432 |
| log(OR) ~ w_h_ratio+ age+chol+bmi |
448.1374 |
471.7942 |
| log(OR) ~ w_h_ratio+age+chol+I(chol^2)+bmi |
448.6343 |
476.2339 |
| log(OR) ~ w_h_ratio + age + chol + I(chol^2)+ bmi+I(bmi^2) |
449.7714 |
481.3138 |
# check for proportional odds assumption
dat$indicator3 <- ifelse(dat$outcome==3, 1,0)
mod.3v12 <- glm(indicator3 ~ w_h_ratio +age+chol+BMI, family = binomial, data = dat)
summary(mod.3v12)
##
## Call:
## glm(formula = indicator3 ~ w_h_ratio + age + chol + BMI, family = binomial,
## data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5446 -0.5734 -0.3827 -0.2251 2.6566
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -11.651098 2.077498 -5.608 2.04e-08 ***
## w_h_ratio 4.187441 2.037622 2.055 0.03987 *
## age 0.047304 0.010042 4.711 2.47e-06 ***
## chol 0.009573 0.003241 2.954 0.00314 **
## BMI 0.059550 0.022953 2.594 0.00947 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 341.71 on 380 degrees of freedom
## Residual deviance: 281.91 on 376 degrees of freedom
## AIC: 291.91
##
## Number of Fisher Scoring iterations: 5
dat$indicator23 <- ifelse(dat$outcome==1, 0 ,1)
mod.32v1 <- glm(indicator23 ~ w_h_ratio + age+chol+BMI, family=binomial,data=dat )
#examining 95% CIs for the two coefficients
coef(mod.3v12)[2] + c(-1,1)*1.96*sqrt(vcov(mod.3v12)[2,2])
## [1] 0.1937014 8.1811804
coef(mod.3v12)[3] + c(-1,1)*1.96*sqrt(vcov(mod.3v12)[3,3])
## [1] 0.02762239 0.06698582
coef(mod.3v12)[4] + c(-1,1)*1.96*sqrt(vcov(mod.3v12)[4,4])
## [1] 0.003220756 0.015924463
coef(mod.3v12)[5] + c(-1,1)*1.96*sqrt(vcov(mod.3v12)[5,5])
## [1] 0.01456302 0.10453762
coef(mod.32v1)[2] + c(-1,1)*1.96*sqrt(vcov(mod.32v1)[2,2])
## [1] 2.014893 9.487432
coef(mod.32v1)[3] + c(-1,1)*1.96*sqrt(vcov(mod.32v1)[3,3])
## [1] 0.03231843 0.06819021
coef(mod.32v1)[4] + c(-1,1)*1.96*sqrt(vcov(mod.32v1)[4,4])
## [1] 0.0004873271 0.0121938689
coef(mod.32v1)[5] + c(-1,1)*1.96*sqrt(vcov(mod.32v1)[5,5])
## [1] 0.009750427 0.091846086
# CI’s for all variables' beta coefficients overlap, so proportional odds assumption holds
# Another way
# Fitting a generalized ordinal model (without the proportional odds assumption)
mod.no_asp <- vglm(outcome ~ w_h_ratio + age + chol+BMI, cumulative(parallel=FALSE, reverse=T), data=dat)
# Conducting the likelihood ratio test
pchisq(deviance(ord.wh_age_chol_bmi) - deviance(mod.no_asp), df=df.residual(ord.wh_age_chol_bmi)-df.residual(mod.no_asp), lower.tail=F)